In [1]:
%pylab
%matplotlib inline


Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib

In [2]:
cd ..


/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-net-work

In [3]:
import sys
import numpy as np
import skimage
import cv2
import sklearn
import imp

In [4]:
from holoviews import *


:0: FutureWarning: IPython widgets are experimental and may change in the future.

In [5]:
import neukrill_net.utils
import neukrill_net.highlevelfeatures

In [6]:
import time

In [7]:
settings = neukrill_net.utils.Settings('settings.json')

In [8]:
X,y = settings.flattened_train_paths(settings.classes)

In [9]:
hlf = neukrill_net.highlevelfeatures.ContourHistogram()

In [10]:
t0 = time.time()
XF = hlf.transform(X)
print("Computing features took {}".format(time.time()-t0))


Computing features took 24.684898138

In [11]:
XF.shape


Out[11]:
(1, 30336, 25)

In [25]:
sklearn.externals.joblib.dump((hlf,XF,y),'cache/contourhistogram.pkl')


Out[25]:
['cache/contourhistogram.pkl', 'cache/contourhistogram.pkl_01.npy']

Naive Bayes


In [12]:
import sklearn.naive_bayes

In [13]:
clf = sklearn.naive_bayes.GaussianNB()

In [14]:
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=0.0431101322174
Accuracy=0.148668248945
Logloss=13.7297668781

Logistic Regression


In [15]:
clf = sklearn.linear_model.LogisticRegression(random_state=42)

In [16]:
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=14.3167510033
Accuracy=0.269251054852
Logloss=3.03321936069

Random Forest


In [17]:
import sklearn.ensemble

In [18]:
clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=109.571610928
Accuracy=0.392009493671
Logloss=2.37714812517

Linear SVC


In [21]:
clf = sklearn.svm.SVC(kernel='linear', probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=978.63530302
Accuracy=0.217497362869
Logloss=3.18116353584

Non-linear SVC

one-vs-one


In [22]:
clf = sklearn.svm.SVC(probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=498.594195843
Accuracy=0.332608122363
Logloss=2.55835715175